
global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/BvD_industry.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"

capture noi {


******************************************
*This do-file maps firms to NACE sectors *
******************************************

*Import merge map
import delimited using ${commondata_dir}/orbis_patents/firm_merge_map.csv, varnames(1) clear
ren bvdid_source BvDIDnumber
tempfile f
save `f'

*Create list of groups
*Orbis patents has all firms applications and a year of application
*we count the years per firm to get the number of patent applications
use ${commondata_dir}/orbis_patents/Orbis_patents_list_2017.dta, clear
keep BvD appln_year
keep if appln_year >= 1980 & appln_year <= 2014
collapse (count)appln_year, by(BvD)
ren appln_year num_patents

mmerge BvDIDnumber using `f', unmatched(master)

gen group = BvD
replace group = bvdid_target if _m == 3
drop _m bvdid_target
tempfile groups
save `groups'

*Create the BvD->sector map
use ${commondata_dir}/orbis_patents/Orbis_patents_2017_nace_country_updated.dta, clear

gen division = substr(nace,1,2)

keep BvD division
duplicates drop

mmerge BvD using `groups', unmatched(using)
replace division = "NA" if _m == 2
drop _m

drop if division == "NA"
drop BvD

* get rid of firm->division mappings if the firm is associated to least one division in our list
gen _finlist = division != "Other"
bys group : egen finlist = max(_finlist)
drop _finlist
drop if division == "Other" & finlist
drop finlist

* check that Other and inlist do not mix within groups:
gen _inlist = division != "Other"
bys group : egen inlist = max(_inlist)
drop _inlist
drop if division == "Other" & inlist

qui count if inlist & division == "Other"
if `r(N)' > 0 {
	di _new as err "Program assumes that there are no firms with Other divisions and firms with divisions in our list within the same group."
	exit 1
}
drop inlist

bys group division: egen division_pats = sum(num_patents)
bys group : egen totpats = sum(num_patents)
gen weight = division_pats / totpats
bys group : egen max_weight = max(weight)

gen division2 = division if weight == max_weight
drop if division2 == ""

duplicates drop group division2, force

* At the time of coding, 306 firms have equal weights to two divisions. We resolve that tie arbitrarily.
duplicates drop group, force
rename division2 industry
rename group BvDIDnumber
keep BvD industry

save ${final_dir}/BvD_industry.dta, replace


}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat
